1 Basic Approach for Time Series Analysis

  1. Load Data
  2. Create and Plot Time Series
  3. Clean / Preprocess / Transform Time Series
  4. Decompose Time Series
  5. Holt-Winters Models: Exponential Smoothing
    1. Fit
    2. Forecast
    3. Residual Analysis
  6. ARIMA / SARIMA Models
    1. Model Selection ARIMA(p,d,q)(P,D,Q)
    2. Fit
    3. Forecast
    4. Residual Analysis
  7. Generalized Linear Regression
    1. Prepare Data Frame
    2. Fit GLS
    3. Residual Analysis

2 Load Libraries

library(statsr)
library(dplyr)
library(MASS)
library(BAS)
library(ggplot2)
library(devtools)
library(gridExtra)
library(grid)
library(GGally)
library(PreProcess)
library(tidyverse)
library(knitr)

library(forecast)
library(nlme)
library(TTR)

3 Helper Function

# Function to plot Normal QQ
qqplot.data <- function (vec) # argument: vector of numbers
{
  y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
  x <- qnorm(c(0.25, 0.75))
  slope <- diff(y)/diff(x)
  int <- y[1L] - slope * x[1L]
  d <- data.frame(resids = vec)
  ggplot(d, aes(sample = resids)) + stat_qq() + geom_abline(slope = slope, intercept = int) + xlab("Theoretical Quantiles") + ylab("Sample Quantiles")
}

4 Souvenir Time Series

4.1 Load Data

# Load data
souvenir <- scan("http://robjhyndman.com/tsdldata/data/fancy.dat")

4.2 Create and Plot Time Series

# Create time series object
souvenirtimeseries <- ts(souvenir, frequency=12, start=c(1987,1))

# Plot time series
plot(souvenirtimeseries)

4.3 Clean / Preprocess / Transform Time Series

# Transform
souvenir.log <- log(souvenir)

# Create time series object
souvenirtimeseries.log <- ts(souvenir.log, frequency=12, start=c(1987,1))

# Plot time series
plot(souvenirtimeseries.log)

# Show series
souvenirtimeseries.log
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1987  7.417466  7.782194  7.951809  8.173939  8.230300  8.220064  8.377841
## 1988  7.823970  8.556075  8.885322  8.477627  8.682857  8.507414  8.728931
## 1989  8.458933  8.648683  9.206089  8.576364  8.778392  8.799481  8.902404
## 1990  8.686278  8.668124  9.427164  8.759319  8.937103  8.885268  9.002236
## 1991  8.481906  8.774967  9.173549  9.084910  9.073646  9.231072  9.330481
## 1992  8.937879  9.195195  9.585923  9.357668  9.141265  9.478999  9.725125
## 1993  9.234373  9.329623  9.990896  9.761770  9.680206  9.830999 10.171801
##            Aug       Sep       Oct       Nov       Dec
## 1987  8.179295  8.521548  8.767715  8.935982  9.891223
## 1988  8.466352  8.611854  8.671647  9.441458 10.259122
## 1989  9.009034  9.056393  9.178901  9.625877 10.435909
## 1990  8.984600  8.998762  9.045077  9.793375 10.312759
## 1991  9.437653  9.361978  9.518332  9.990679 10.715766
## 1992  9.897902 10.083029 10.142164 10.491963 11.298763
## 1993 10.260691 10.325659 10.335962 10.750093 11.558479

4.4 Decompose Time Series

# Decompose time series
souvenirtimeseries.log.components <- decompose(souvenirtimeseries.log)

# Plot
plot(souvenirtimeseries.log.components)

4.5 Holt-Winters Models: Exponential Smoothing

4.5.1 Fit

# Model using Holt-Winters Smoothing
souvenirtimeseries.log.hw <- HoltWinters(souvenirtimeseries.log)

# Model specification
souvenirtimeseries.log.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = souvenirtimeseries.log)
## 
## Smoothing parameters:
##  alpha: 0.413418
##  beta : 0
##  gamma: 0.9561275
## 
## Coefficients:
##            [,1]
## a   10.37661961
## b    0.02996319
## s1  -0.80952063
## s2  -0.60576477
## s3   0.01103238
## s4  -0.24160551
## s5  -0.35933517
## s6  -0.18076683
## s7   0.07788605
## s8   0.10147055
## s9   0.09649353
## s10  0.05197826
## s11  0.41793637
## s12  1.18088423
# Plot interpolation
plot(souvenirtimeseries.log.hw)

4.5.2 Forecast

# Forecast using Holt-Winters Smoothing
souvenirtimeseries.log.forecasts.hw <- forecast:::forecast.HoltWinters(souvenirtimeseries.log.hw, h=48)

# Plot forecasts
forecast:::plot.forecast(souvenirtimeseries.log.forecasts.hw)

4.5.3 Residual Analysis

To test whether there is significant evidence for non-zero correlations at lags 1-20, we can carry out a Ljung-Box test.

# Plot residuals
na.omit(souvenirtimeseries.log.forecasts.hw$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(souvenirtimeseries.log.forecasts.hw$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  souvenirtimeseries.log.forecasts.hw$residuals
## X-squared = 17.53, df = 20, p-value = 0.6183
# Plot histogram of residuals
ggplot() + aes(as.numeric(na.omit(souvenirtimeseries.log.forecasts.hw$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(souvenirtimeseries.log.forecasts.hw$residuals)))

4.6 ARIMA / SARIMA Models

4.6.1 Model Selection ARIMA(p,d,q)(P,D,Q)

# Perform seasonal differentiation with lag 12
souvenirtimeseries.log.deseasonalized <- souvenirtimeseries.log %>% diff(lag=12)
ggtsdisplay(souvenirtimeseries.log.deseasonalized, main='', theme=theme_bw())

Box.test(souvenirtimeseries.log.deseasonalized, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  souvenirtimeseries.log.deseasonalized
## X-squared = 57.561, df = 20, p-value = 1.687e-05
# Find the orders for the seasonal and non-seasonal parts -> use ACF for MA(q) and PACF for AR(p)
# BIC: ARIMA(2,0,0)(0,1,1)[12] with drift
# AIC: ARIMA(2,0,0)(0,1,2)[12] with drift
souvenirtimeseries.log.arima <- auto.arima(souvenirtimeseries.log, max.p=10, max.q=10, stationary=FALSE, seasonal=TRUE, ic="aic", stepwise=FALSE)
souvenirtimeseries.log.arima
## Series: souvenirtimeseries.log 
## ARIMA(2,0,0)(0,1,2)[12] with drift 
## 
## Coefficients:
##          ar1     ar2     sma1     sma2   drift
##       0.3559  0.2819  -0.5724  -0.2787  0.0233
## s.e.  0.1119  0.1216   0.4056   0.2829  0.0021
## 
## sigma^2 estimated as 0.02617:  log likelihood=25.55
## AIC=-39.1   AICc=-37.8   BIC=-25.44

4.6.2 Fit

# Forecast using ARIMA/SARIMA
souvenirtimeseries.log.fit.arima <- arima(souvenirtimeseries.log, order=c(2,0,0), seasonal=c(0,1,1))

4.6.3 Forecast

souvenirtimeseries.log.forecasts.arima <- souvenirtimeseries.log.fit.arima %>% forecast(h=48) %>% autoplot()
souvenirtimeseries.log.forecasts.arima

4.6.4 Residual Analysis

checkresiduals(souvenirtimeseries.log.fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0)(0,1,1)[12]
## Q* = 16.353, df = 14, p-value = 0.2923
## 
## Model df: 3.   Total lags used: 17

4.7 Generalized Linear Regression

4.7.1 Prepare Data Frame

num.resp <- as.numeric(souvenirtimeseries.log)
num.time <- as.numeric(time(num.resp))
mn01 <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun")
mn02 <- c("Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
month <- factor(cycle(souvenirtimeseries.log), labels=c(mn01, mn02))
dat <- data.frame(resp=num.resp, time=num.time, month)

4.7.2 Fit GLS

corStruct <- corARMA(form = ~ time, p=2, q=0)
souvenirtimeseries.log.fit.gls <- gls(resp ~ time + month, data=dat, corr=corStruct)
ggplot(dat, aes(x=time)) + geom_line(aes(y=resp), color="black") + geom_line(aes(y=souvenirtimeseries.log.fit.gls$fitted), color="red") 

4.7.3 Resdiaul Analysis

# Plot residuals
na.omit(souvenirtimeseries.log.fit.gls$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(souvenirtimeseries.log.fit.gls$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  souvenirtimeseries.log.fit.gls$residuals
## X-squared = 86.295, df = 20, p-value = 3.271e-10
# Plot histogram of residuals
ggplot()+aes(as.numeric(na.omit(souvenirtimeseries.log.fit.gls$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(souvenirtimeseries.log.fit.gls$residuals)))

4.8 Compare SSE

length.hw <- length(souvenirtimeseries.log.hw$x)
length.arima <- length(souvenirtimeseries.log.arima$residuals)
length.gls <- length(souvenirtimeseries.log.fit.gls$residuals)

length.hw
## [1] 84
length.arima
## [1] 84
length.gls
## [1] 84
cat("ES:\t", souvenirtimeseries.log.hw$SSE, "\n")
## ES:   2.011491
cat("ARIMA:\t", sum(souvenirtimeseries.log.arima$residuals^2), "\n")
## ARIMA:    1.753194
cat("GLS:\t", sum(souvenirtimeseries.log.fit.gls$residuals^2), "\n")
## GLS:  2.496712

5 Kings Time Series

5.1 Load Data

kings <- scan("http://robjhyndman.com/tsdldata/misc/kings.dat", skip=3)

5.2 Create and Plot Time Series

kings <- ts(kings)
kings %>% ggtsdisplay(main='', theme=theme_bw())

5.3 Decompose Time Series (Non-seasonal)

kings.sma <- SMA(kings, n=8)
plot.ts(kings.sma)

5.4 Holt-Winters Models: Exponential Smoothing

# Fit
kings.hw <- HoltWinters(kings, gamma=FALSE)
kings.hw
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = kings, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.5590871
##  beta : 0.2367654
##  gamma: FALSE
## 
## Coefficients:
##        [,1]
## a 64.804647
## b -1.679965
plot(kings.hw)

# Forecast using Holt-Winters Smoothing
kings.forecasts.hw <- forecast:::forecast.HoltWinters(kings.hw, h=10)

# Plot forecasts
forecast:::plot.forecast(kings.forecasts.hw)

# Plot residuals
na.omit(kings.forecasts.hw$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(kings.forecasts.hw$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  kings.forecasts.hw$residuals
## X-squared = 14.195, df = 20, p-value = 0.8205
# Plot histogram of residuals
ggplot()+aes(as.numeric(na.omit(kings.forecasts.hw$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(kings.forecasts.hw$residuals)))

5.5 ARIMA Models

kings.detrend <- kings %>% diff(lag=1)
ggtsdisplay(kings.detrend, main='d=1', theme=theme_bw())

Box.test(kings.detrend, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  kings.detrend
## X-squared = 25.103, df = 20, p-value = 0.1975
# BIC: ARIMA(0,1,1)
# AIC: ARIMA(0,1,1)
kings.arima <- auto.arima(kings, max.p=10, max.q=10, stationary=FALSE, seasonal=FALSE, ic="bic", stepwise=FALSE)
kings.arima
## Series: kings 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.7218
## s.e.   0.1208
## 
## sigma^2 estimated as 236.2:  log likelihood=-170.06
## AIC=344.13   AICc=344.44   BIC=347.56
kings.fit.arima <- arima(kings, order=c(0,1,1))
kings.forecasts.arima <- kings.fit.arima %>% forecast(h=10) %>% autoplot()
kings.forecasts.arima

checkresiduals(kings.fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)
## Q* = 4.2785, df = 7, p-value = 0.7472
## 
## Model df: 1.   Total lags used: 8

5.6 Generalized Linear Regression

num.resp <- as.numeric(kings)
num.time <- as.numeric(time(num.resp))
dat <- data.frame(resp=num.resp, time=num.time)

corStruct <- corARMA(form = ~ time, p=0, q=1)
kings.fit.gls <- gls(resp ~ time, data=dat, corr=corStruct)
ggplot(dat, aes(x=time)) + geom_line(aes(y=resp), color="black") + geom_line(aes(y=kings.fit.gls$fitted), color="red") 

# Plot residuals
na.omit(kings.fit.gls$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(kings.fit.gls$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  kings.fit.gls$residuals
## X-squared = 21.822, df = 20, p-value = 0.3503
# Plot histogram of residuals
ggplot()+aes(as.numeric(na.omit(kings.fit.gls$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(kings.fit.gls$residuals)))

5.7 Compare SSE

length.hw <- length(kings.hw$x)
length.arima <- length(kings.arima$residuals)
length.gls <- length(kings.fit.gls$residuals)

length.hw
## [1] 42
length.arima
## [1] 42
length.gls
## [1] 42
cat("ES:\t", kings.hw$SSE, "\n")
## ES:   14115.41
cat("ARIMA:\t", sum(kings.arima$residuals^2), "\n")
## ARIMA:    9447.93
cat("GLS:\t", sum(kings.fit.gls$residuals^2), "\n")
## GLS:  9429.192

6 NY Births Time Series

##Load Data

births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")
births <- ts(births, frequency = 12, start = c(1946, 1))
births %>% ggtsdisplay(main='', theme=theme_bw())

6.1 Decompose Time Series

births.components <- decompose(births)
plot(births.components)

6.2 Holt-Winters Models: Exponential Smoothing

# Fit
births.hw <- HoltWinters(births)
births.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = births)
## 
## Smoothing parameters:
##  alpha: 0.4823655
##  beta : 0.02988495
##  gamma: 0.563186
## 
## Coefficients:
##            [,1]
## a   28.04366357
## b    0.04199921
## s1  -0.78546221
## s2  -2.19944507
## s3   0.87813012
## s4  -0.65164728
## s5   0.63427267
## s6   0.21182821
## s7   2.23177191
## s8   2.17167733
## s9   1.52077678
## s10  1.16900861
## s11 -0.97500043
## s12 -0.18636055
plot(births.hw)

# Forecast using Holt-Winters Smoothing
births.forecasts.hw <- forecast:::forecast.HoltWinters(births.hw, h=48)

# Plot forecasts
forecast:::plot.forecast(births.forecasts.hw)

# Plot residuals
na.omit(births.forecasts.hw$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(births.forecasts.hw$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  births.forecasts.hw$residuals
## X-squared = 53.506, df = 20, p-value = 6.844e-05
# Plot histogram of residuals
ggplot()+aes(as.numeric(na.omit(births.forecasts.hw$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(births.forecasts.hw$residuals)))

6.3 ARIMA / SARIMA Models

births.deseasonalized <- births %>% diff(lag=12) %>% diff(lag=1)
ggtsdisplay(births.deseasonalized, main='d=1', theme=theme_bw())

Box.test(births.deseasonalized, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  births.deseasonalized
## X-squared = 75.634, df = 20, p-value = 2.135e-08
# AIC: ARIMA(2,1,1)(1,1,1)[12]
# BIC: ARIMA(2,1,1)(1,1,1)[12]
births.arima <- auto.arima(births, max.p=10, max.q=10, stationary=FALSE, seasonal=TRUE, ic="aic", stepwise=FALSE)
births.arima
## Series: births 
## ARIMA(2,1,1)(1,1,1)[12] 
## 
## Coefficients:
##          ar1     ar2      ma1     sar1     sma1
##       0.4349  -0.241  -0.4999  -0.2474  -0.8465
## s.e.  0.1846   0.085   0.1854   0.0986   0.1004
## 
## sigma^2 estimated as 0.406:  log likelihood=-157.78
## AIC=327.56   AICc=328.12   BIC=345.82
births.fit.arima <- arima(births, order=c(2,1,1), seasonal=c(1,1,1))
births.forecasts.arima <- births.fit.arima %>% forecast(h=48) %>% autoplot()
births.forecasts.arima

checkresiduals(births.fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,1)(1,1,1)[12]
## Q* = 25.844, df = 19, p-value = 0.1346
## 
## Model df: 5.   Total lags used: 24

6.4 Generalized Linear Regression

num.resp <- as.numeric(births)
num.time <- as.numeric(time(num.resp))
mn01 <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun")
mn02 <- c("Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
month <- factor(cycle(births), labels=c(mn01, mn02))
dat <- data.frame(resp=num.resp, time=num.time, month)

corStruct <- corARMA(form = ~ time, p=0, q=1)
births.fit.gls <- gls(resp ~ time + month, data=dat, corr=corStruct)
ggplot(dat, aes(x=time)) + geom_line(aes(y=resp), color="black") + geom_line(aes(y=births.fit.gls$fitted), color="red") 

# Plot residuals
na.omit(births.fit.gls$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(births.fit.gls$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  births.fit.gls$residuals
## X-squared = 269.42, df = 20, p-value < 2.2e-16
# Plot histogram of residuals
ggplot() + aes(as.numeric(na.omit(births.fit.gls$residuals))) + geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(births.fit.gls$residuals)))

6.5 Compare SSE

length.hw <- length(births.hw$x)
length.arima <- length(births.arima$residuals)
length.gls <- length(births.fit.gls$residuals)

length.hw
## [1] 168
length.arima
## [1] 168
length.gls
## [1] 168
cat("ES:\t", births.hw$SSE, "\n")
## ES:   90.94058
cat("ARIMA:\t", sum(births.arima$residuals^2), "\n")
## ARIMA:    60.9012
cat("GLS:\t", sum(births.fit.gls$residuals^2), "\n")
## GLS:  186.657

7 Rain Time Series

##Load Data

rain <- scan("http://robjhyndman.com/tsdldata/hurst/precip1.dat", skip=1)

7.1 Create and Plot Time Series

rain <- ts(rain, start=c(1813))
rain %>% ggtsdisplay(main='', theme=theme_bw())

Box.test(rain, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  rain
## X-squared = 17.477, df = 20, p-value = 0.6218

7.2 Holt-Winters Models: Exponential Smoothing

# Fit
rain.hw <- HoltWinters(rain, beta=FALSE, gamma=FALSE)
rain.hw
## Holt-Winters exponential smoothing without trend and without seasonal component.
## 
## Call:
## HoltWinters(x = rain, beta = FALSE, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.02412151
##  beta : FALSE
##  gamma: FALSE
## 
## Coefficients:
##       [,1]
## a 24.67819
plot(rain.hw)

# Forecast using Holt-Winters Smoothing
rain.forecasts.hw <- forecast:::forecast.HoltWinters(rain.hw, h=10)

# Plot forecasts
forecast:::plot.forecast(rain.forecasts.hw)

# Plot residuals
na.omit(rain.forecasts.hw$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(rain.forecasts.hw$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  rain.forecasts.hw$residuals
## X-squared = 17.401, df = 20, p-value = 0.6268
# Plot histogram of residuals
ggplot()+aes(as.numeric(na.omit(rain.forecasts.hw$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(rain.forecasts.hw$residuals)))

8 Skirts Time Series

##Load Data

skirts <- scan("http://robjhyndman.com/tsdldata/roberts/skirts.dat", skip=5)

8.1 Create and Plot Time Series

skirts <- ts(skirts, start=c(1866))
skirts %>% ggtsdisplay(main='', theme=theme_bw())

Box.test(skirts, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  skirts
## X-squared = 268.02, df = 20, p-value < 2.2e-16

8.2 Decompose Time Series

skirts.sma <- SMA(skirts, n=5)
plot.ts(skirts.sma)

8.3 Holt-Winters Models: Exponential Smoothing

# Fit
skirts.hw <- HoltWinters(skirts, beta=TRUE, gamma=FALSE)
skirts.hw
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = skirts, beta = TRUE, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.8383681
##  beta : TRUE
##  gamma: FALSE
## 
## Coefficients:
##         [,1]
## a 529.308749
## b   5.690579
plot(skirts.hw)

# Forecast using Holt-Winters Smoothing
skirts.forecasts.hw <- forecast:::forecast.HoltWinters(skirts.hw, h=10)

# Plot forecasts
forecast:::plot.forecast(skirts.forecasts.hw)

# Plot residuals
na.omit(skirts.forecasts.hw$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(skirts.forecasts.hw$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  skirts.forecasts.hw$residuals
## X-squared = 19.732, df = 20, p-value = 0.4748
# Plot histogram of residuals
ggplot()+aes(as.numeric(na.omit(skirts.forecasts.hw$residuals)))+geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(skirts.forecasts.hw$residuals)))

8.4 ARIMA / SARIMA Models

skirts.deseasonalized <- skirts %>% diff(lag=1) %>% diff(lag=1)
ggtsdisplay(skirts.deseasonalized, main='d=2', theme=theme_bw())

Box.test(skirts.deseasonalized, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  skirts.deseasonalized
## X-squared = 31.208, df = 20, p-value = 0.05251
# AIC: ARIMA(5,2,0)
# BIC: ARIMA(1,2,0)
skirts.arima <- auto.arima(skirts, max.p=10, max.q=10, stationary=FALSE, seasonal=FALSE, ic="aic", stepwise=FALSE)
skirts.arima
## Series: skirts 
## ARIMA(5,2,0) 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4      ar5
##       -0.2416  0.0456  0.0842  -0.0113  -0.4198
## s.e.   0.1326  0.1365  0.1366   0.1364   0.1299
## 
## sigma^2 estimated as 343.9:  log likelihood=-188.82
## AIC=389.64   AICc=391.91   BIC=400.35
skirts.fit.arima <- arima(skirts, order=c(1,2,0))
skirts.forecasts.arima <- skirts.fit.arima %>% forecast(h=10) %>% autoplot()
skirts.forecasts.arima

checkresiduals(skirts.fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,2,0)
## Q* = 11.396, df = 8, p-value = 0.1802
## 
## Model df: 1.   Total lags used: 9

9 Volcano Dusts Time Series

##Load Data

volcanodust <- scan("http://robjhyndman.com/tsdldata/annual/dvi.dat", skip=1)

9.1 Create and Plot Time Series

volcanodust <- ts(volcanodust, start=c(1500))
volcanodust %>% ggtsdisplay(main='', theme=theme_bw())

Box.test(volcanodust, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  volcanodust
## X-squared = 321.71, df = 20, p-value < 2.2e-16

9.2 Holt-Winters Models: Exponential Smoothing

# Fit
volcanodust.hw <- HoltWinters(volcanodust, beta=FALSE, gamma=FALSE)
volcanodust.hw
## Holt-Winters exponential smoothing without trend and without seasonal component.
## 
## Call:
## HoltWinters(x = volcanodust, beta = FALSE, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.9171114
##  beta : FALSE
##  gamma: FALSE
## 
## Coefficients:
##        [,1]
## a 0.0248329
plot(volcanodust.hw)

# Forecast using Holt-Winters Smoothing
volcanodust.forecasts.hw <- forecast:::forecast.HoltWinters(volcanodust.hw, h=100)

# Plot forecasts
forecast:::plot.forecast(volcanodust.forecasts.hw)

# Plot residuals
na.omit(volcanodust.forecasts.hw$residuals) %>% ggtsdisplay(lag.max=20, main='Residual', theme=theme_bw())

# Run Ljung-Box test (null hypothesis: there is no non-zero autocorrelation in the in-sample forecast errors)
Box.test(volcanodust.forecasts.hw$residuals, lag=20, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  volcanodust.forecasts.hw$residuals
## X-squared = 58.88, df = 20, p-value = 1.06e-05
# Plot histogram of residuals
ggplot() + aes(as.numeric(na.omit(volcanodust.forecasts.hw$residuals))) + geom_histogram(aes(y=..density..), bins=30, alpha=0.75, color='blue', fill='blue') + geom_density()

# Plot quantile-quantile plot
qqplot.data(as.numeric(na.omit(volcanodust.forecasts.hw$residuals)))

9.3 ARIMA / SARIMA Models

# AIC: ARIMA(1,0,2)
# BIC: ARIMA(2,0,0)
volcanodust.arima <- auto.arima(volcanodust, max.p=10, max.q=10, stationary=TRUE, seasonal=FALSE, ic="bic", stepwise=FALSE)
volcanodust.arima
## Series: volcanodust 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2     mean
##       0.7533  -0.1268  57.5274
## s.e.  0.0457   0.0458   8.5958
## 
## sigma^2 estimated as 4901:  log likelihood=-2662.54
## AIC=5333.09   AICc=5333.17   BIC=5349.7
volcanodust.fit.arima <- arima(volcanodust, order=c(2,0,0))
volcanodust.forecasts.arima <- volcanodust %>% forecast(h=100) %>% autoplot()
volcanodust.forecasts.arima

checkresiduals(volcanodust.fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 6.176, df = 7, p-value = 0.5194
## 
## Model df: 3.   Total lags used: 10
  1. How are confidence intervals calculated in each models: e.g. Holt-Winters, ARIMA, GLS?
  2. Kalman filter, Particle filter
  3. Garch